library(knitr)
library(lubridate)
library(readr)
library(dplyr)
library(ggplot2)
library(plotly)
library(DT)
library(highcharter)
library(ggmap)
# when I downloaded the data from Kaggle the file name was "other-Lyft_B02510.csv"
# I changed it to "Lyft.csv"
lyft <- read_csv('Lyft.csv')
colnames(lyft) <- c('time_of_trip', 'lat', 'lon', 'X4')
lyft <- lyft %>%
select(time_of_trip, lat, lon) %>%
mutate(time_of_trip = mdy_hm(time_of_trip) ) %>%
mutate(Day = day(time_of_trip),
Month = month(time_of_trip, label = TRUE, abbr = FALSE),
Year = year(time_of_trip),
Hour = factor(hour(time_of_trip)),
Weekday = wday(time_of_trip, label = TRUE, abbr = FALSE) )
nyc <- c(lon = mean(lyft$lon, na.rm = TRUE), lat = mean(lyft$lat, na.rm = TRUE) )
nyc_map <- get_map(location = nyc, zoom = 13)
This data set contains information about Lyft’s pickups in New York City from July 25, 2014 to September 30, 2014. There were a total of 267,701 pickups (rows) in 68 days. Source: Kaggle.
n_month <- lyft %>%
group_by(Month, Year) %>%
summarise(Pickups = n(), first_day = min(Day), last_day = max(Day)) %>%
select(Year, Month, first_day, last_day, Pickups)
n_month$Year <- as.character(n_month$Year)
n_month$first_day <- as.character(n_month$first_day)
n_month$last_day <- as.character(n_month$last_day)
n_month$Pickups <- prettyNum(n_month$Pickups, big.mark = ',')
colnames(n_month)[3:4] <- c('Firsr Day', 'Last Day')
kable(n_month)
| Year | Month | Firsr Day | Last Day | Pickups |
|---|---|---|---|---|
| 2014 | July | 25 | 31 | 4,254 |
| 2014 | August | 1 | 31 | 147,448 |
| 2014 | September | 1 | 30 | 115,999 |
n_hour <- lyft %>%
group_by(Hour) %>%
summarise(Pickups = n() )
ggplotly(ggplot(n_hour, aes(Hour, Pickups) ) +
geom_bar(stat = 'identity', fill = 'deeppink3') +
ggtitle("Number of Pickups per Hour") )
This map displays all the Lyft pickups from July 25, 2014 to September 30, 2014. The highest demand for Lyft pickups occur during 9pm - 12am and 12am - 3am. It was intresting to find out that the least amount of pickups occur during 9am - 12pm!
D. Kahle and H. Wickham. ggmap: Spatial Visualization with ggplot2. The R Journal, 5(1), 144-161.
lyft$Hour <- as.integer(lyft$Hour)
lyft <- lyft %>%
mutate(Day_Segment = Hour ) %>%
mutate(Day_Segment = if_else(Day_Segment <= 3, "00-03",
if_else(Day_Segment <= 6, "03-06",
if_else(Day_Segment <= 9, "06-09",
if_else(Day_Segment <= 12, "09-12",
if_else(Day_Segment <= 15, "12-15",
if_else(Day_Segment <= 18, "15-18",
if_else(Day_Segment <= 21, "18-21", "21-24"))))))))
lyft1 <- lyft %>%
filter(Day_Segment == '00-03' )
ggmap(nyc_map, base_layer = ggplot(lyft1, aes(lon, lat) ), extent = 'device', legend = 'none')+
geom_point(alpha = 1/20, size = 0.8, colour = 'deeppink3')
lyft2 <- lyft %>%
filter(Day_Segment == '03-06' )
ggmap(nyc_map, base_layer = ggplot(lyft2, aes(lon, lat) ), extent = 'device', legend = 'none')+
geom_point(alpha = 1/20, size = 0.8, colour = 'deeppink3')
lyft3 <- lyft %>%
filter(Day_Segment == '06-09' )
ggmap(nyc_map, base_layer = ggplot(lyft3, aes(lon, lat) ), extent = 'device', legend = 'none')+
geom_point(alpha = 1/20, size = 0.8, colour = 'deeppink3')
lyft4 <- lyft %>%
filter(Day_Segment == '09-12' )
ggmap(nyc_map, base_layer = ggplot(lyft4, aes(lon, lat) ), extent = 'device', legend = 'none')+
geom_point(alpha = 1/20, size = 0.8, colour = 'deeppink3')
lyft5 <- lyft %>%
filter(Day_Segment == '12-15' )
ggmap(nyc_map, base_layer = ggplot(lyft5, aes(lon, lat) ), extent = 'device', legend = 'none')+
geom_point(alpha = 1/20, size = 0.8, colour = 'deeppink3')
lyft6 <- lyft %>%
filter(Day_Segment == '15-18' )
ggmap(nyc_map, base_layer = ggplot(lyft6, aes(lon, lat) ), extent = 'device', legend = 'none')+
geom_point(alpha = 1/20, size = 0.8, colour = 'deeppink3')